library(tidyverse) ; library(reshape2) ; library(glue) ; library(plotly) ; library(dendextend)
library(RColorBrewer) ; library(viridis) ; require(gridExtra) ; library(GGally) ; library(ggpubr)
library(expss)
library(polycor)
library(foreach) ; library(doParallel)
library(knitr) ; library(kableExtra)
library(biomaRt)
library(clusterProfiler) ; library(ReactomePA) ; library(DOSE) ; library(org.Hs.eg.db)
library(WGCNA)

SFARI_colour_hue = function(r) {
  pal = c('#FF7631','#FFB100','#E8E328','#8CC83F','#62CCA6','#59B9C9','#b3b3b3','#808080','gray','#d9d9d9')[r]
}

Load preprocessed dataset (preprocessing code in 01_data_preprocessing.Rmd) and clustering (pipeline in 05_WGCNA.Rmd)

# Gandal dataset
load('./../Data/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
DE_info = DE_info %>% data.frame


# GO Neuronal annotations: regex 'neuron' in GO functional annotations and label the genes that make a match as neuronal
GO_annotations = read.csv('./../Data/genes_GO_annotations.csv')
GO_neuronal = GO_annotations %>% filter(grepl('neuron', go_term)) %>% 
              mutate('ID'=as.character(ensembl_gene_id)) %>% 
              dplyr::select(-ensembl_gene_id) %>% distinct(ID) %>%
              mutate('Neuronal'=1)


# SFARI Genes
SFARI_genes = read_csv('./../../../SFARI/Data/SFARI_genes_01-03-2020_w_ensembl_IDs.csv')
SFARI_genes = SFARI_genes[!duplicated(SFARI_genes$ID) & !is.na(SFARI_genes$ID),]

# Old SFARI Genes
old_SFARI_genes = read_csv('./../../../SFARI/Data/SFARI_genes_08-29-2019_w_ensembl_IDs.csv')
old_SFARI_genes = old_SFARI_genes[!duplicated(old_SFARI_genes$ID) & !is.na(old_SFARI_genes$ID),]


# Clusterings
clusterings = read_csv('./../Data/clusters.csv')


# Update DE_info with SFARI and Neuronal information
genes_info = DE_info %>% mutate('ID'=rownames(.)) %>% left_join(SFARI_genes, by='ID') %>% 
             mutate(`gene-score`=ifelse(is.na(`gene-score`), 'Others', `gene-score`)) %>%
             left_join(old_SFARI_genes %>% rename(`gene-score` = 'old-gene-score') %>% 
                       dplyr::select(ID, `old-gene-score`), by = 'ID') %>%
             mutate(`old-gene-score`=ifelse(is.na(`old-gene-score`), 'Others', `old-gene-score`)) %>%
             left_join(GO_neuronal, by='ID') %>% left_join(clusterings, by='ID') %>%
             mutate(Neuronal=ifelse(is.na(Neuronal), 0, Neuronal)) %>%
             mutate(gene.score=ifelse(`gene-score`=='Others' & Neuronal==1, 'Neuronal', `gene-score`), 
                    old.gene.score=ifelse(`old-gene-score`=='Others' & Neuronal==1,'Neuronal',`old-gene-score`), 
                    significant=padj<0.05 & !is.na(padj))

# Add gene symbol
getinfo = c('ensembl_gene_id','external_gene_id')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
               host='feb2014.archive.ensembl.org') ## Gencode v19
gene_names = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=genes_info$ID, mart=mart)

genes_info = genes_info %>% left_join(gene_names, by=c('ID'='ensembl_gene_id'))


clustering_selected = 'DynamicHybrid'
genes_info$Module = genes_info[,clustering_selected]

dataset = read.csv(paste0('./../Data/dataset_', clustering_selected, '.csv'))
dataset$Module = dataset[,clustering_selected]
dataset$gene.score = as.character(dataset$gene.score)
dataset$gene.score[dataset$gene.score=='None'] = 'Others' 


rm(DE_info, GO_annotations, clusterings, getinfo, mart, dds)


Selecting Top Modules


plot_data = dataset %>% dplyr::select(Module, MTcor) %>% distinct %>% 
            mutate(alpha = ifelse(abs(MTcor)>0.7, 1, 0.3))

top_modules = plot_data %>% arrange(desc(MTcor)) %>% filter(abs(MTcor)>0.7) %>% pull(Module) %>% as.character

Selecting Modules with a Module-Diagnosis correlation magnitude larger than 0.7 (instead of 0.9 as we did with Gandal’s dataset because the relation is not as strong in this dataset)

The 4 modules that fulfill this condition are #6EB000, #FF67A4, #00C0B3, #B2A100

ggplotly(plot_data %>% ggplot(aes(reorder(Module, -MTcor), MTcor)) + 
         geom_bar(stat='identity', fill = plot_data$Module, alpha = plot_data$alpha) + 
         geom_hline(yintercept =c(0.7, -0.7), color = 'gray', linetype = 'dotted') + 
         xlab('Modules')+ ylab('Module-Diagnosis Correlation') + theme_minimal() + 
         theme(axis.text.x = element_text(angle = 90, hjust = 1)))


The modules consist mainly of points with high (absolute) values in PC2 (which we know is related to LFC), so this result is consistent with the high correlation between Module and Diagnosis, although some of the points with the highest PC2 values do not belong to these top modules

The genes belonging to the modules with the positive Module-Diagnosis correlation have positive LFC values and the genes belonging to the modules with the negative Module-Diagnosis correlation have negative values

pca = datExpr %>% prcomp

plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
            left_join(dataset, by='ID') %>% 
            left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
            dplyr::select(ID, external_gene_id, PC1, PC2, Module, gene.score) %>%
            mutate(ImportantModules = ifelse(Module %in% top_modules, as.character(Module), 'Others')) %>%
            mutate(color = ifelse(ImportantModules=='Others','gray',ImportantModules),
                   alpha = ifelse(ImportantModules=='Others', 0.1, 0.6),
                   gene_id = paste0(ID, ' (', external_gene_id, ')')) %>%
            apply_labels(ImportantModules = 'Top Modules')

cro(plot_data$ImportantModules)
 #Total 
 Top Modules 
   #00C0B3  25
   #6EB000  147
   #B2A100  43
   #FF67A4  93
   Others  12854
   #Total cases  13162
ggplotly(plot_data %>% ggplot(aes(PC1, PC2, color=ImportantModules)) + 
         geom_point(alpha=plot_data$alpha, color=plot_data$color, aes(ID=gene_id)) + theme_minimal() +
         xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],2),'%)')) +
         ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],2),'%)')) +
         ggtitle('Genes belonging to the Modules with the strongest relation to ASD'))
rm(pca)




SFARI Genes


List of top 20 SFARI Genes in top modules ordered by SFARI score and Gene Significance

list_top_SFARI_genes = function(table_data, module) {
  
  t = table_data %>% filter(Module == module & `SFARI score` %in% c(1,2,3)) %>% 
      slice_head(n=20) %>% dplyr::select(-Module, -`Ensembl ID`)
 
  return(t)
}


table_data = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
             dplyr::select(ID, external_gene_id, GS, gene.score, Module) %>% 
             arrange(gene.score, desc(abs(GS))) %>%
             dplyr::rename('Ensembl ID' = ID, 'Gene Symbol' = external_gene_id, 
                    'SFARI score' = gene.score, 'Gene Significance' = GS)

kable(list_top_SFARI_genes(table_data, top_modules[1]), 
      caption=paste0('Top SFARI Genes for Module ', top_modules[1])) %>%
      kable_styling(full_width = F)
Top SFARI Genes for Module #6EB000
Gene Symbol Gene Significance SFARI score
NAA15 0.5983095 1
UBE3A 0.4552343 1
ZNF462 0.4444701 1
PTCHD1 0.3362551 1
NFIB 0.5391484 3
ZNF827 0.5351863 3
TRIM33 0.4182244 3
PSD3 0.4108514 3
HOMER1 0.1536586 3
TTN 0.1524867 3
SRGAP3 0.1435950 3
kable(list_top_SFARI_genes(table_data, top_modules[2]), 
      caption=paste0('Top SFARI Genes for Module ', top_modules[2])) %>%
      kable_styling(full_width = F)
Top SFARI Genes for Module #FF67A4
Gene Symbol Gene Significance SFARI score
PAK2 0.5218456 2
ANXA1 0.3348133 2
CSDE1 0.2541192 2
DIP2C 0.0336524 2
DPYSL3 0.3983865 3
MAOB 0.3298410 3
NRP2 0.3046318 3
kable(list_top_SFARI_genes(table_data, top_modules[3]), 
      caption=paste0('Top SFARI Genes for Module ', top_modules[3])) %>%
      kable_styling(full_width = F)
Top SFARI Genes for Module #00C0B3
Gene Symbol Gene Significance SFARI score
kable(list_top_SFARI_genes(table_data, top_modules[4]), 
      caption=paste0('Top SFARI Genes for Module ', top_modules[4])) %>%
      kable_styling(full_width = F)
Top SFARI Genes for Module #B2A100
Gene Symbol Gene Significance SFARI score
PPP2R5D -0.5184148 1
UNC13A -0.5645527 3
rm(table_data, list_top_SFARI_genes)




Module Eigengenes


Since these modules have the strongest relation to autism, this pattern should be reflected in their model eigengenes, having two different behaviours for the samples corresponding to autism and the ones corresponding to control

In all cases, the Eigengenes separate the behaviour between autism and control samples very clearly (p-value < \(10^{-4}\)). Modules with positive Module-Diagnosis correlation have higher eigengenes in the ASD samples and Modules with a negative correlation, in the Control samples

plot_EGs = function(module){

  plot_data = data.frame('ID' = rownames(MEs), 'MEs' = MEs[,paste0('ME', module)], 
                         'Diagnosis' = datMeta$Diagnosis)

  p = plot_data %>% ggplot(aes(Diagnosis, MEs, fill=Diagnosis)) + 
      geom_boxplot(outlier.colour='#cccccc', outlier.shape='o', outlier.size=3) +
      ggtitle(paste0('Module ', module, '  (MTcor=',round(dataset$MTcor[dataset$Module == module][1],2),')')) + 
      stat_compare_means(method = 't.test', method.args = list(var.equal = FALSE), label = 'p.signif',
                        ref.group = 'ASD') +
      ylab('Module Eigengenes') + theme_minimal() + theme(legend.position='none')
  
  return(p)
}


# Calculate Module Eigengenes
ME_object = datExpr %>% t %>% moduleEigengenes(colors = genes_info$Module)
MEs = orderMEs(ME_object$eigengenes)

p1 = plot_EGs(top_modules[1])
p2 = plot_EGs(top_modules[2])
p3 = plot_EGs(top_modules[3])
p4 = plot_EGs(top_modules[4])

grid.arrange(p1, p2, p3, p4, nrow=2)

rm(plot_EGs, ME_object, MEs, p1, p2, p3, p4)




Identifying representative genes for each Module


In the WGCNA pipeline, the most representative genes of each module are selected based on having a high module membership as well as a high (absolute) gene significance, so I’m going to do the same

SFARI Genes don’t seem to be more representative than the rest of the genes

create_plot = function(module){
  
  plot_data = dataset %>% dplyr::select(ID, paste0('MM.',gsub('#','',module)), GS, gene.score) %>% 
              filter(dataset$Module==module)
  colnames(plot_data)[2] = 'Module'
  
  SFARI_colors = as.numeric(names(table(as.character(plot_data$gene.score)[plot_data$gene.score!='Others'])))
  
  p = ggplotly(plot_data %>% mutate(gene.score = ifelse(gene.score =='Others', 'Not in SFARI', gene.score)) %>% 
               ggplot(aes(Module, GS, color=gene.score)) +
               geom_point(alpha=0.5, aes(ID=ID)) +  xlab('Module Membership') + ylab('Gene Significance') +
               ggtitle(paste0('Module ', module, '  (MTcor = ', 
                              round(dataset$MTcor[dataset$Module == module][1],2),')')) +
               scale_color_manual(values=SFARI_colour_hue(r=c(SFARI_colors,7))) + theme_minimal())
  
  return(p)
}

create_plot(top_modules[1])
create_plot(top_modules[2])
create_plot(top_modules[3])
create_plot(top_modules[4])
rm(create_plot)

Top 20 genes for each module


Ordered by \(\frac{MM+|GS|}{2}\)

There aren’t that many SFARI genes in the top genes of the modules

create_table = function(module){
  top_genes = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>% 
              dplyr::select(ID, external_gene_id, paste0('MM.',gsub('#','',module)), GS, gene.score) %>% 
              filter(dataset$Module==module) %>% dplyr::rename('MM' = paste0('MM.',gsub('#','',module))) %>% 
              mutate(Relevance = (MM+abs(GS))/2, 
                     gene.score = ifelse(gene.score =='Others', 'Not in SFARI', gene.score)) %>% 
              arrange(by=-Relevance) %>% top_n(20) %>% 
              dplyr::rename('Gene Symbol' = external_gene_id, 'SFARI Score' = gene.score)
  return(top_genes)
}

top_genes = list()
for(i in 1:length(top_modules)) top_genes[[i]] = create_table(top_modules[i])

kable(top_genes[[1]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[1], 
      '  (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[1]][1],2),')')) %>% 
      kable_styling(full_width = F)
Top 20 genes for Module #6EB000 (MTcor = 0.75)
Gene Symbol MM GS SFARI Score Relevance
PRKAR2A 0.8218661 0.7456689 Not in SFARI 0.7837675
SNTB2 0.7752402 0.6241026 Not in SFARI 0.6996714
FAM63B 0.6969144 0.6869672 Not in SFARI 0.6919408
TCF7L1 0.6670241 0.6994476 Not in SFARI 0.6832359
NAA15 0.7606106 0.5983095 1 0.6794601
YAF2 0.7039981 0.6335549 Not in SFARI 0.6687765
LRRCC1 0.7491550 0.5193407 Not in SFARI 0.6342478
TSHZ1 0.6383677 0.6139444 Not in SFARI 0.6261560
UBE3A 0.7945380 0.4552343 1 0.6248861
ZNF827 0.7083492 0.5351863 3 0.6217678
TROVE2 0.5979169 0.6396588 Not in SFARI 0.6187879
KLF6 0.6495152 0.5852331 Not in SFARI 0.6173741
GPM6B 0.5877091 0.6441520 Not in SFARI 0.6159305
MLTK 0.6328203 0.5933900 Not in SFARI 0.6131051
TRPS1 0.6146199 0.6037748 Not in SFARI 0.6091974
NTM 0.6346031 0.5679247 Not in SFARI 0.6012639
ZNF236 0.6896857 0.5107865 Not in SFARI 0.6002361
SOX6 0.6443396 0.5556541 Not in SFARI 0.5999969
UBE2R2 0.5948893 0.5958741 Not in SFARI 0.5953817
WASF2 0.5543124 0.6126520 Not in SFARI 0.5834822
kable(top_genes[[2]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[2], 
      '  (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[2]][1],2),')')) %>% 
      kable_styling(full_width = F)
Top 20 genes for Module #FF67A4 (MTcor = 0.73)
Gene Symbol MM GS SFARI Score Relevance
S100A10 0.7952997 0.7672674 Not in SFARI 0.7812835
SERPINA3 0.8431698 0.6124417 Not in SFARI 0.7278058
C1R 0.7671195 0.6460763 Not in SFARI 0.7065979
RARRES3 0.6658650 0.6840915 Not in SFARI 0.6749782
PLEKHA4 0.6380535 0.6877998 Not in SFARI 0.6629266
CD44 0.8189879 0.4656470 Not in SFARI 0.6423174
ITGB4 0.6852345 0.5695569 Not in SFARI 0.6273957
APLNR 0.6929955 0.5557228 Not in SFARI 0.6243591
TNFRSF1A 0.6728107 0.5700682 Not in SFARI 0.6214394
MARCH3 0.6828831 0.5587051 Not in SFARI 0.6207941
AHCYL1 0.6760356 0.5326626 Not in SFARI 0.6043491
EMP3 0.6866028 0.4781196 Not in SFARI 0.5823612
PAK2 0.6289021 0.5218456 2 0.5753738
GBP1 0.6572292 0.4776008 Not in SFARI 0.5674150
VWA5A 0.6256114 0.5081924 Not in SFARI 0.5669019
PDLIM4 0.7129191 0.4075584 Not in SFARI 0.5602388
C1S 0.5827351 0.5290971 Not in SFARI 0.5559161
MAOB 0.7812057 0.3298410 3 0.5555234
METTL7B 0.5859454 0.5223654 Not in SFARI 0.5541554
GFAP 0.6525288 0.4394174 Not in SFARI 0.5459731
kable(top_genes[[3]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[3], 
      '  (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[3]][1],2),')')) %>% 
      kable_styling(full_width = F)
Top 20 genes for Module #00C0B3 (MTcor = -0.72)
Gene Symbol MM GS SFARI Score Relevance
DIRAS1 0.8349782 -0.6179126 Not in SFARI 0.7264454
ENTPD6 0.7798351 -0.6380428 Not in SFARI 0.7089390
SYT12 0.7900458 -0.5645101 Not in SFARI 0.6772780
AGK 0.6732774 -0.6330130 Not in SFARI 0.6531452
PTPRN 0.7665361 -0.5282348 Not in SFARI 0.6473855
SYNGR1 0.8036179 -0.4397788 Not in SFARI 0.6216984
GALNT9 0.7018693 -0.5196740 Not in SFARI 0.6107717
SLC6A7 0.7467111 -0.4549039 Not in SFARI 0.6008075
FAM168B 0.7073245 -0.4784784 Not in SFARI 0.5929015
MFN2 0.6205103 -0.4338702 Not in SFARI 0.5271902
NDUFS1 0.4642397 -0.5613344 Not in SFARI 0.5127871
KIF17 0.5792293 -0.4433830 Not in SFARI 0.5113062
L1CAM 0.6054613 -0.4074516 Not in SFARI 0.5064564
ANKRD24 0.5584268 -0.4502776 Not in SFARI 0.5043522
RALGPS1 0.5125237 -0.4467721 Not in SFARI 0.4796479
SH2D5 0.5695856 -0.3592635 Not in SFARI 0.4644245
KCNJ12 0.4564403 -0.4117266 Not in SFARI 0.4340835
ADD1 0.4618516 -0.3645726 Not in SFARI 0.4132121
MED30 0.4606382 -0.3011352 Not in SFARI 0.3808867
DAB2IP 0.4158944 -0.2878911 Not in SFARI 0.3518927
kable(top_genes[[4]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[4], 
      '  (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[4]][1],2),')')) %>% 
      kable_styling(full_width = F)
Top 20 genes for Module #B2A100 (MTcor = -0.74)
Gene Symbol MM GS SFARI Score Relevance
SPTBN2 0.7820609 -0.6534302 Not in SFARI 0.7177456
SPTBN4 0.7787142 -0.6397786 Not in SFARI 0.7092464
CHGB 0.8090061 -0.6073738 Not in SFARI 0.7081900
GPRASP1 0.7572204 -0.6507010 Not in SFARI 0.7039607
C1orf216 0.7600110 -0.5723119 Not in SFARI 0.6661614
MYBBP1A 0.6486548 -0.6472883 Not in SFARI 0.6479715
JAKMIP1 0.6769531 -0.5950656 Not in SFARI 0.6360093
RASGRF1 0.6768487 -0.5920387 Not in SFARI 0.6344437
MAP7D1 0.7004008 -0.5558373 Not in SFARI 0.6281191
GGT7 0.6700347 -0.5793055 Not in SFARI 0.6246701
MCM3AP 0.6985525 -0.5494518 Not in SFARI 0.6240022
UNC13A 0.6677312 -0.5645527 3 0.6161419
AZI1 0.7273648 -0.4630492 Not in SFARI 0.5952070
TMEM132D 0.6771081 -0.5123580 Not in SFARI 0.5947330
PPP2R5D 0.6544952 -0.5184148 1 0.5864550
SVOP 0.6573215 -0.5060600 Not in SFARI 0.5816908
SLC9A3R2 0.5822338 -0.5609900 Not in SFARI 0.5716119
ARPP21 0.6343947 -0.5017338 Not in SFARI 0.5680643
HEXIM1 0.6139250 -0.5149664 Not in SFARI 0.5644457
SALL2 0.6336481 -0.4804278 Not in SFARI 0.5570380
rm(create_table, i)
pca = datExpr %>% prcomp

ids = c()
for(tg in top_genes) ids = c(ids, tg$ID)

plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
            left_join(dataset, by='ID') %>% dplyr::select(ID, PC1, PC2, Module, gene.score) %>%
            mutate(color = ifelse(Module %in% top_modules, as.character(Module), 'gray')) %>%
            mutate(alpha = ifelse(color %in% top_modules & ID %in% ids, 1, 0.1))

plot_data %>% ggplot(aes(PC1, PC2)) + geom_point(alpha=plot_data$alpha, color=plot_data$color) + 
              xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],2),'%)')) +
              ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],2),'%)')) +
              theme_minimal() + ggtitle('Most relevant genes for top Modules')

rm(ids, pca, tg, plot_data)

Level of expression by Diagnosis for top genes, ordered by relevance (defined above): There is a visible difference in level of expression between diagnosis groups in all of these genes

create_plot = function(i){
  
  plot_data = datExpr[rownames(datExpr) %in% top_genes[[i]]$ID,] %>% mutate('ID' = rownames(.)) %>% 
              melt(id.vars='ID') %>% mutate(variable = gsub('X','',variable)) %>%
              left_join(top_genes[[i]], by='ID') %>%
              left_join(datMeta %>% dplyr::select(ID, Diagnosis),
                        by = c('variable'='ID')) %>% arrange(desc(Relevance))
  
  p = ggplotly(plot_data %>% mutate(external_gene_id=factor(`Gene Symbol`, 
                                    levels=unique(plot_data$`Gene Symbol`), ordered=T)) %>%
               ggplot(aes(`Gene Symbol`, value, fill=Diagnosis)) + geom_boxplot() + theme_minimal() +
                      ggtitle(paste0('Top Genes for Module ', top_modules[i], ' (MTcor = ',
                      round(dataset$MTcor[dataset$Module == top_modules[i]][1],2), ')')) + 
                      xlab('') + ylab('Level of Expression') +
                      theme(axis.text.x = element_text(angle = 90, hjust = 1)))
  return(p)
}

create_plot(1)
create_plot(2)
create_plot(3)
create_plot(4)
rm(create_plot)




Enrichment Analysis


Using the package clusterProfiler. Performing Gene Set Enrichment Analysis (GSEA) and Over Representation Analysis (ORA) using the following datasets:

file_name = './../Data/GSEA.RData'

if(file.exists(file_name)){
  load(file_name)
} else {
  ##############################################################################
  # PREPARE DATASET
  
  # Create dataset with top modules membership and removing the genes without an assigned module
  EA_dataset = data.frame('ensembl_gene_id' = genes_info$ID, module = genes_info$Module)  %>% 
               filter(genes_info$Module!='gray')
  
  # Assign Entrez Gene Id to each gene
  getinfo = c('ensembl_gene_id','entrezgene')
  mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl', 
                 host='feb2014.archive.ensembl.org')
  biomart_output = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), 
                         values=genes_info$ID[genes_info$Module!='gray'], mart=mart)
  
  EA_dataset = biomart_output %>% dplyr::rename('ID' = ensembl_gene_id) %>%
               left_join(dataset %>% dplyr::select(ID, contains('MM.')), by='ID')

  
  ##############################################################################
  # PERFORM ENRICHMENT
  
  # Following https://yulab-smu.github.io/clusterProfiler-book/chapter8.html
  
  modules = dataset$Module[dataset$Module!='gray'] %>% as.character %>% table %>% names
  nPerm = 1e5 # 100 times more than the default
  
  enrichment_GO = list()         # Gene Ontology
  enrichment_DO = list()         # Disease Ontology
  enrichment_DGN = list()        # Disease Gene Networks
  enrichment_KEGG = list()       # Kyoto Encyclopedia of Genes and Genomes
  enrichment_Reactome = list()   # Reactome: Pathway db
  
  
  for(module in modules){
    cat('\n')
    cat(paste0('Module: ', which(modules == module), '/', length(modules)))
    geneList = EA_dataset[,paste0('MM.',substring(module,2))]
    names(geneList) = EA_dataset[,'entrezgene'] %>% as.character
    geneList = sort(geneList, decreasing = TRUE)
    
    enrichment_GO[[module]] = gseGO(geneList, OrgDb = org.Hs.eg.db, pAdjustMethod = 'bonferroni', 
                                    pvalueCutoff = 0.1, nPerm = nPerm, verbose = FALSE, seed = TRUE) %>% 
                              data.frame
    enrichment_DO[[module]] = gseDO(geneList, pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1,
                                    nPerm = nPerm, verbose = FALSE, seed = TRUE) %>% data.frame
    enrichment_DGN[[module]] = gseDGN(geneList, pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1,
                                      nPerm = nPerm, verbose = FALSE, seed = TRUE) %>% data.frame
    enrichment_KEGG[[module]] = gseKEGG(geneList, organism = 'human', pAdjustMethod = 'bonferroni', 
                                        pvalueCutoff = 0.1, nPerm = nPerm, verbose = FALSE, seed = TRUE) %>% 
                                data.frame
    enrichment_Reactome[[module]] = gsePathway(geneList, organism = 'human', pAdjustMethod = 'bonferroni', 
                                               pvalueCutoff = 0.1, nPerm = nPerm, verbose = F, seed = T) %>% 
                                    data.frame
    
    # Temporal save, just in case SFARI Genes enrichment fails
    save(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome, file=file_name)
  }
  

  ##############################################################################
  # PERFROM ENRICHMENT FOR SFARI GENES
  
  # BUILD MAPPING BETWEEN GENES AND SFARI

  # Build TERM2GENE: dataframe of 2 columns with term and gene
  term2gene = biomart_output %>% 
              left_join(genes_info %>% dplyr::select(ID, `gene-score`), 
                         by = c('ensembl_gene_id'='ID')) %>% dplyr::select(-ensembl_gene_id) %>% 
              mutate('SFARI' = ifelse(`gene-score` != 'Others','SFARI','Others'),
                     `gene-score` = ifelse(`gene-score` != 'Others', 
                                           paste0('SFARI Score ',`gene-score`), 'Others')) %>%
              melt(id.vars = 'entrezgene') %>% dplyr::select(value, entrezgene) %>% 
              dplyr::rename('term' = value, 'gene' = entrezgene) %>% distinct
  
  
  # PERFORM GSEA
  enrichment_SFARI = list()
  cat('\n\nGSEA OF SFARI GENES\n')
  
  for(module in modules){
    cat('\n')
    cat(paste0('Module: ', which(modules == module), '/', length(modules)))
    geneList = EA_dataset[,paste0('MM.',substring(module,2))]
    names(geneList) = EA_dataset[,'entrezgene'] %>% as.character
    geneList = sort(geneList, decreasing = TRUE)
      
    enrichment_SFARI[[module]] = clusterProfiler::GSEA(geneList, pAdjustMethod = 'bonferroni',  nPerm = nPerm,
                                                       TERM2GENE = term2gene, pvalueCutoff=1, maxGSSize=2e3,
                                                        verbose = FALSE, seed = TRUE) %>% data.frame
    
    # Temporal save
    save(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome, 
         enrichment_SFARI, file=file_name)
  }
  
  ##############################################################################
  # PERFROM ENRICHMENT FOR OLD SFARI GENES
  
  # BUILD MAPPING BETWEEN GENES AND SFARI

  # Build TERM2GENE: dataframe of 2 columns with term and gene
  term2gene = biomart_output %>% 
              left_join(genes_info %>% dplyr::select(ID, `old-gene-score`), 
                         by = c('ensembl_gene_id'='ID')) %>% dplyr::select(-ensembl_gene_id) %>% 
              mutate('SFARI' = ifelse(`old-gene-score` != 'Others','SFARI','Others'),
                     `old-gene-score` = ifelse(`old-gene-score` != 'Others', 
                                           paste0('SFARI Score ',`old-gene-score`), 'Others')) %>%
              melt(id.vars = 'entrezgene') %>% dplyr::select(value, entrezgene) %>% 
              dplyr::rename('term' = value, 'gene' = entrezgene) %>% distinct
  
  
  # PERFORM GSEA
  enrichment_old_SFARI = list()
  cat('\n\nGSEA OF OLD SFARI GENES\n')
  
  for(module in modules){
    cat('\n')
    cat(paste0('Module: ', which(modules == module), '/', length(modules)))
    geneList = EA_dataset[,paste0('MM.',substring(module,2))]
    names(geneList) = EA_dataset[,'entrezgene'] %>% as.character
    geneList = sort(geneList, decreasing = TRUE)
      
    enrichment_old_SFARI[[module]] = clusterProfiler::GSEA(geneList, pAdjustMethod = 'bonferroni',  
                                                           nPerm = nPerm, TERM2GENE = term2gene, pvalueCutoff=1, 
                                                           maxGSSize=2e3, verbose = FALSE, seed = TRUE) %>% 
                                     data.frame
    
    # Temporal save
    save(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome, 
         enrichment_SFARI, enrichment_old_SFARI, file=file_name)
  }
  ##############################################################################
  # Save enrichment results
  save(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome, 
       enrichment_SFARI, enrichment_old_SFARI, file=file_name)
  
  rm(getinfo, mart, biomart_output, module, term2gene, geneList, EA_dataset, nPerm)
}

# Rename lists
GSEA_GO = enrichment_GO
GSEA_DGN = enrichment_DGN
GSEA_DO = enrichment_DO
GSEA_KEGG = enrichment_KEGG
GSEA_Reactome = enrichment_Reactome
GSEA_SFARI = enrichment_SFARI
GSEA_old_SFARI = enrichment_old_SFARI
file_name = './../Data/ORA.RData'

if(file.exists(file_name)){
  load(file_name)
} else {
  
  ##############################################################################
  # PREPARE DATASET
  
  # Create dataset with top modules membership and removing the genes without an assigned module
  EA_dataset = data.frame('ensembl_gene_id' = genes_info$ID, module = genes_info$Module)  %>% 
               filter(genes_info$Module!='gray')
  
  # Assign Entrez Gene Id to each gene
  getinfo = c('ensembl_gene_id','entrezgene')
  mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl', 
                 host='feb2014.archive.ensembl.org')
  biomart_output = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), 
                         values=genes_info$ID[genes_info$Module!='gray'], mart=mart)
  
  EA_dataset = biomart_output %>% dplyr::rename('ID' = ensembl_gene_id) %>%
               left_join(dataset %>% dplyr::select(ID, Module), by='ID')

  
  ##############################################################################
  # PERFORM ENRICHMENT
  
  # Following https://yulab-smu.github.io/clusterProfiler-book/chapter8.html
  
  modules = dataset$Module[dataset$Module!='gray'] %>% as.character %>% table %>% names
  universe = EA_dataset$entrezgene %>% as.character
  
  enrichment_GO = list()         # Gene Ontology
  enrichment_DO = list()         # Disease Ontology
  enrichment_DGN = list()        # Disease Gene Networks
  enrichment_KEGG = list()       # Kyoto Encyclopedia of Genes and Genomes
  enrichment_Reactome = list()   # Reactome: Pathway db
  
  
  for(module in modules){
    
    genes_in_module = EA_dataset$entrezgene[EA_dataset$Module == module]
    
    enrichment_GO[[module]] = enrichGO(gene = genes_in_module, universe = universe, OrgDb = org.Hs.eg.db, 
                                       ont = 'All', pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1, 
                                       qvalueCutoff = 1) %>% data.frame
    enrichment_DO[[module]] = enrichDO(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
                                       pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1) %>% data.frame
    enrichment_DGN[[module]] = enrichDGN(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
                                         pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1) %>% data.frame
    enrichment_KEGG[[module]] = enrichKEGG(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
                                           pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1) %>% data.frame
    enrichment_Reactome[[module]] = enrichPathway(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
                                                  pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1) %>% 
                                    data.frame
  }
  
  # Temporal save, just in case SFARI Genes enrichment fails
  save(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome, file=file_name)
  
  
  ##############################################################################
  # PERFROM ENRICHMENT FOR SFARI GENES
  
  # BUILD MAPPING BETWEEN GENES AND SFARI

  # Build TERM2GENE: dataframe of 2 columns with term and gene
  term2gene = biomart_output %>% 
              left_join(genes_info %>% dplyr::select(ID, `gene-score`), 
                         by = c('ensembl_gene_id'='ID')) %>% dplyr::select(-ensembl_gene_id) %>% 
              mutate('SFARI' = ifelse(`gene-score` != 'Others','SFARI','Others'),
                     `gene-score` = ifelse(`gene-score` != 'Others', 
                                           paste0('SFARI Score ',`gene-score`), 'Others')) %>%
              melt(id.vars = 'entrezgene') %>% dplyr::select(value, entrezgene) %>% 
              dplyr::rename('term' = value, 'gene' = entrezgene) %>% distinct
  
  
  # PERFORM GSEA
  enrichment_SFARI = list()
  
  for(module in modules){
      genes_in_module = EA_dataset$entrezgene[EA_dataset$Module == module]
      
      enrichment_SFARI[[module]] = enricher(gene = genes_in_module, universe = universe, 
                                            pAdjustMethod = 'bonferroni', TERM2GENE = term2gene, 
                                            pvalueCutoff = 1, qvalueCutoff = 1, maxGSSize = 50000) %>% 
                                    data.frame %>% dplyr::select(-geneID,-Description)
  }

  ##############################################################################
  # PERFROM ENRICHMENT FOR SFARI GENES
  
  # BUILD MAPPING BETWEEN GENES AND SFARI

  # Build TERM2GENE: dataframe of 2 columns with term and gene
  term2gene = biomart_output %>% 
              left_join(genes_info %>% dplyr::select(ID, `old-gene-score`), 
                         by = c('ensembl_gene_id'='ID')) %>% dplyr::select(-ensembl_gene_id) %>% 
              mutate('SFARI' = ifelse(`old-gene-score` != 'Others','SFARI','Others'),
                     `old-gene-score` = ifelse(`old-gene-score` != 'Others', 
                                           paste0('SFARI Score ',`old-gene-score`), 'Others')) %>%
              melt(id.vars = 'entrezgene') %>% dplyr::select(value, entrezgene) %>% 
              dplyr::rename('term' = value, 'gene' = entrezgene) %>% distinct
  
  
  # PERFORM GSEA
  enrichment_old_SFARI = list()
  
  for(module in modules){
      genes_in_module = EA_dataset$entrezgene[EA_dataset$Module == module]
      
      enrichment_old_SFARI[[module]] = enricher(gene = genes_in_module, universe = universe, 
                                                pAdjustMethod = 'bonferroni', TERM2GENE = term2gene, 
                                                pvalueCutoff = 1, qvalueCutoff = 1, maxGSSize = 5e4) %>% 
                                       data.frame %>% dplyr::select(-geneID,-Description)
  }
  
  ##############################################################################
  # Save enrichment results
  save(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome, 
       enrichment_SFARI, enrichment_old_SFARI, file=file_name)
  
  rm(getinfo, mart, biomart_output, gene, module, term2gene, genes_in_module, EA_dataset, universe, file_name)
}

# Rename lists
ORA_GO = enrichment_GO
ORA_DGN = enrichment_DGN
ORA_DO = enrichment_DO
ORA_KEGG = enrichment_KEGG
ORA_Reactome = enrichment_Reactome
ORA_SFARI = enrichment_SFARI
ORA_old_SFARI = enrichment_old_SFARI

rm(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome, enrichment_SFARI, 
   enrichment_old_SFARI)


Both GSEA and ORA are commonly used to study enrichment in sets of genes, but when using them for studying our modules both have shortcomings:

modules = dataset$Module[dataset$Module!='gray'] %>% as.character %>% table %>% names

module = sample(modules,1)

plot_data = dataset %>% dplyr::select(Module, paste0('MM.',gsub('#','',module))) %>% 
            mutate(in_module = substring(Module,2) == gsub('#','',module), selected_module = module) %>%
            mutate(alpha = ifelse(in_module, 0.8, 0.1))
colnames(plot_data)[2] = 'MM'

p = plot_data %>% ggplot(aes(selected_module, MM, color = in_module)) + geom_jitter(alpha = plot_data$alpha) + 
    xlab('') + ylab('Module Membership') + coord_flip() + theme_minimal() + 
    theme(legend.position = 'none')

ggExtra::ggMarginal(p, type = 'density', groupColour = TRUE, groupFill = TRUE, margins = 'x', size=1)

rm(modules, module, p, plot_data)

So perhaps it could be useful to use both methods together, since they seem to complement each other’s shortcomings very well, performing the enrichment using both methods and identifying the terms that are found to be enriched by both

Note: Since the enrichment in both methods is quite a stric restriction, we decide to relax the corrected p-value threshold (using Bonferroni correction) to 0.1.

compare_methods = function(GSEA_list, ORA_list){
  
  for(top_module in top_modules){
  
    cat(paste0('  \n  \nEnrichments for Module ', top_module, ' (MTcor=',
               round(dataset$MTcor[dataset$Module==top_module][1],2), '):  \n  \n'))
    
    GSEA = GSEA_list[[top_module]]
    ORA = ORA_list[[top_module]]
    
    cat(paste0('GSEA has ', nrow(GSEA), ' enriched terms  \n'))
    cat(paste0('ORA has  ', nrow(ORA), ' enriched terms  \n'))
    cat(paste0(sum(ORA$ID %in% GSEA$ID), ' terms are enriched in both methods  \n  \n'))

    plot_data = GSEA %>% mutate(pval_GSEA = p.adjust) %>% dplyr::select(ID, Description, NES, pval_GSEA) %>%
                inner_join(ORA %>% mutate(pval_ORA = p.adjust) %>% 
                           dplyr::select(ID, pval_ORA, GeneRatio, qvalue), by = 'ID') 
    
    if(nrow(plot_data)>0){
      print(plot_data %>% mutate(pval_mean = pval_ORA + pval_GSEA) %>% 
                          arrange(pval_mean) %>% dplyr::select(-pval_mean) %>% 
            kable %>% kable_styling(full_width = F))
    }
  } 
}


plot_results = function(GSEA_list, ORA_list){
  
  l = htmltools::tagList()

  for(i in 1:length(top_modules)){
    
    GSEA = GSEA_list[[top_modules[i]]]
    ORA = ORA_list[[top_modules[i]]]
    
    plot_data = GSEA %>% mutate(pval_GSEA = p.adjust) %>% dplyr::select(ID, Description, NES, pval_GSEA) %>%
                inner_join(ORA %>% mutate(pval_ORA = p.adjust) %>% dplyr::select(ID, pval_ORA), by = 'ID')
    
    if(nrow(plot_data)>5){
      min_val = min(min(plot_data$pval_GSEA), min(plot_data$pval_ORA))
      max_val = max(max(max(plot_data$pval_GSEA), max(plot_data$pval_ORA)),0.05)
      ggp = ggplotly(plot_data %>% ggplot(aes(pval_GSEA, pval_ORA, color = NES)) + 
                     geom_point(aes(id = Description)) + 
                     geom_vline(xintercept = 0.05, color = 'gray', linetype = 'dotted') + 
                     geom_hline(yintercept = 0.05, color = 'gray', linetype = 'dotted') + 
                     ggtitle(paste0('Enriched terms in common for Module ', top_modules[i])) +
                     scale_x_continuous(limits = c(min_val, max_val)) + 
                     scale_y_continuous(limits = c(min_val, max_val)) + 
                     xlab('Corrected p-value for GSEA') + ylab('Corrected p-value for ORA') +
                     scale_colour_viridis(direction = -1) + theme_minimal() + coord_fixed())
      l[[i]] = ggp
    }
  }
  
  return(l)
}


KEGG

compare_methods(GSEA_KEGG, ORA_KEGG)

Enrichments for Module #6EB000 (MTcor=0.75):

GSEA has 2 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #FF67A4 (MTcor=0.73):

GSEA has 38 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #00C0B3 (MTcor=-0.72):

GSEA has 6 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #B2A100 (MTcor=-0.74):

GSEA has 7 enriched terms
ORA has 1 enriched terms
0 terms are enriched in both methods


Reactome

compare_methods(GSEA_Reactome, ORA_Reactome)

Enrichments for Module #6EB000 (MTcor=0.75):

GSEA has 9 enriched terms
ORA has 1 enriched terms
0 terms are enriched in both methods

Enrichments for Module #FF67A4 (MTcor=0.73):

GSEA has 19 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #00C0B3 (MTcor=-0.72):

GSEA has 30 enriched terms
ORA has 1 enriched terms
0 terms are enriched in both methods

Enrichments for Module #B2A100 (MTcor=-0.74):

GSEA has 33 enriched terms
ORA has 2 enriched terms
0 terms are enriched in both methods


Gene Ontology

compare_methods(GSEA_GO, ORA_GO)

Enrichments for Module #6EB000 (MTcor=0.75):

GSEA has 26 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #FF67A4 (MTcor=0.73):

GSEA has 189 enriched terms
ORA has 12 enriched terms
2 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
GO:0006954 inflammatory response 2.417124 0.0808313 0.0024388 15/89 0.0022197
GO:0045087 innate immune response 2.407573 0.0797238 0.0746085 15/89 0.0200354

Enrichments for Module #00C0B3 (MTcor=-0.72):

GSEA has 58 enriched terms
ORA has 2 enriched terms
0 terms are enriched in both methods

Enrichments for Module #B2A100 (MTcor=-0.74):

GSEA has 62 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods


Disease Ontology

compare_methods(GSEA_DO, ORA_DO)

Enrichments for Module #6EB000 (MTcor=0.75):

GSEA has 29 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #FF67A4 (MTcor=0.73):

GSEA has 192 enriched terms
ORA has 6 enriched terms
5 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
DOID:7148 rheumatoid arthritis 2.476518 0.0107916 0.0015561 15/58 0.0013270
DOID:65 connective tissue disease 2.316857 0.0104699 0.0051872 20/58 0.0022118
DOID:0080001 bone disease 2.376433 0.0105371 0.0082591 18/58 0.0023478
DOID:848 arthritis 2.411146 0.0106846 0.0281152 15/58 0.0057780
DOID:3342 bone inflammation disease 2.441100 0.0106604 0.0406522 15/58 0.0057780

Enrichments for Module #00C0B3 (MTcor=-0.72):

GSEA has 66 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #B2A100 (MTcor=-0.74):

GSEA has 50 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods


Disease Gene Network

compare_methods(GSEA_DGN, ORA_DGN)

Enrichments for Module #6EB000 (MTcor=0.75):

GSEA has 23 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #FF67A4 (MTcor=0.73):

GSEA has 305 enriched terms
ORA has 6 enriched terms
4 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
umls:C0019163 Hepatitis B 2.330456 0.0399615 0.0174600 15/84 0.0126828
umls:C0029408 Degenerative polyarthritis 1.924231 0.0394216 0.0604659 16/84 0.0126828
umls:C0024115 Lung diseases 2.426405 0.0415188 0.0647541 9/84 0.0126828
umls:C0035222 Respiratory Distress Syndrome, Adult 2.348761 0.0424784 0.0956488 6/84 0.0126828

Enrichments for Module #00C0B3 (MTcor=-0.72):

GSEA has 52 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #B2A100 (MTcor=-0.74):

GSEA has 48 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods



Session info

sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] WGCNA_1.69             fastcluster_1.1.25     dynamicTreeCut_1.63-1 
##  [4] org.Hs.eg.db_3.8.2     AnnotationDbi_1.46.1   IRanges_2.18.3        
##  [7] S4Vectors_0.22.1       Biobase_2.44.0         BiocGenerics_0.30.0   
## [10] DOSE_3.10.2            ReactomePA_1.28.0      clusterProfiler_3.12.0
## [13] biomaRt_2.40.5         kableExtra_1.1.0       knitr_1.28            
## [16] doParallel_1.0.15      iterators_1.0.12       foreach_1.5.0         
## [19] polycor_0.7-10         expss_0.10.2           ggpubr_0.2.5          
## [22] magrittr_1.5           GGally_1.5.0           gridExtra_2.3         
## [25] viridis_0.5.1          viridisLite_0.3.0      RColorBrewer_1.1-2    
## [28] dendextend_1.13.4      plotly_4.9.2           glue_1.4.1            
## [31] reshape2_1.4.4         forcats_0.5.0          stringr_1.4.0         
## [34] dplyr_1.0.0            purrr_0.3.4            readr_1.3.1           
## [37] tidyr_1.1.0            tibble_3.0.1           ggplot2_3.3.2         
## [40] tidyverse_1.3.0       
## 
## loaded via a namespace (and not attached):
##   [1] tidyselect_1.1.0            RSQLite_2.2.0              
##   [3] htmlwidgets_1.5.1           grid_3.6.3                 
##   [5] BiocParallel_1.18.1         munsell_0.5.0              
##   [7] codetools_0.2-16            preprocessCore_1.46.0      
##   [9] miniUI_0.1.1.1              withr_2.2.0                
##  [11] colorspace_1.4-1            GOSemSim_2.10.0            
##  [13] highr_0.8                   rstudioapi_0.11            
##  [15] ggsignif_0.6.0              labeling_0.3               
##  [17] urltools_1.7.3              GenomeInfoDbData_1.2.1     
##  [19] polyclip_1.10-0             bit64_0.9-7                
##  [21] farver_2.0.3                vctrs_0.3.1                
##  [23] generics_0.0.2              xfun_0.12                  
##  [25] R6_2.4.1                    GenomeInfoDb_1.20.0        
##  [27] graphlayouts_0.7.0          locfit_1.5-9.4             
##  [29] DelayedArray_0.10.0         bitops_1.0-6               
##  [31] reshape_0.8.8               fgsea_1.10.1               
##  [33] gridGraphics_0.5-0          assertthat_0.2.1           
##  [35] promises_1.1.0              scales_1.1.1               
##  [37] ggraph_2.0.3                nnet_7.3-14                
##  [39] enrichplot_1.4.0            ggExtra_0.9                
##  [41] gtable_0.3.0                tidygraph_1.2.0            
##  [43] rlang_0.4.6                 genefilter_1.66.0          
##  [45] splines_3.6.3               lazyeval_0.2.2             
##  [47] acepack_1.4.1               impute_1.58.0              
##  [49] broom_0.5.5                 europepmc_0.4              
##  [51] checkmate_2.0.0             BiocManager_1.30.10        
##  [53] yaml_2.2.1                  modelr_0.1.6               
##  [55] crosstalk_1.1.0.1           backports_1.1.8            
##  [57] httpuv_1.5.2                qvalue_2.16.0              
##  [59] Hmisc_4.4-0                 tools_3.6.3                
##  [61] ggplotify_0.0.5             ellipsis_0.3.1             
##  [63] ggridges_0.5.2              Rcpp_1.0.4.6               
##  [65] plyr_1.8.6                  base64enc_0.1-3            
##  [67] progress_1.2.2              zlibbioc_1.30.0            
##  [69] RCurl_1.98-1.2              prettyunits_1.1.1          
##  [71] rpart_4.1-15                cowplot_1.0.0              
##  [73] SummarizedExperiment_1.14.1 haven_2.2.0                
##  [75] ggrepel_0.8.2               cluster_2.1.0              
##  [77] fs_1.4.0                    data.table_1.12.8          
##  [79] DO.db_2.9                   triebeard_0.3.0            
##  [81] reprex_0.3.0                reactome.db_1.68.0         
##  [83] matrixStats_0.56.0          mime_0.9                   
##  [85] xtable_1.8-4                hms_0.5.3                  
##  [87] evaluate_0.14               XML_3.99-0.3               
##  [89] jpeg_0.1-8.1                readxl_1.3.1               
##  [91] compiler_3.6.3              crayon_1.3.4               
##  [93] htmltools_0.4.0             later_1.0.0                
##  [95] Formula_1.2-3               geneplotter_1.62.0         
##  [97] lubridate_1.7.4             DBI_1.1.0                  
##  [99] tweenr_1.0.1                dbplyr_1.4.2               
## [101] MASS_7.3-51.6               rappdirs_0.3.1             
## [103] Matrix_1.2-18               cli_2.0.2                  
## [105] igraph_1.2.5                GenomicRanges_1.36.1       
## [107] pkgconfig_2.0.3             rvcheck_0.1.8              
## [109] foreign_0.8-76              xml2_1.2.5                 
## [111] annotate_1.62.0             webshot_0.5.2              
## [113] XVector_0.24.0              rvest_0.3.5                
## [115] digest_0.6.25               graph_1.62.0               
## [117] rmarkdown_2.1               cellranger_1.1.0           
## [119] fastmatch_1.1-0             htmlTable_1.13.3           
## [121] curl_4.3                    shiny_1.4.0.2              
## [123] graphite_1.30.0             lifecycle_0.2.0            
## [125] nlme_3.1-147                jsonlite_1.7.0             
## [127] fansi_0.4.1                 pillar_1.4.4               
## [129] lattice_0.20-41             fastmap_1.0.1              
## [131] httr_1.4.1                  survival_3.1-12            
## [133] GO.db_3.8.2                 UpSetR_1.4.0               
## [135] png_0.1-7                   bit_1.1-15.2               
## [137] ggforce_0.3.1               stringi_1.4.6              
## [139] blob_1.2.1                  DESeq2_1.24.0              
## [141] latticeExtra_0.6-29         memoise_1.1.0